Link about it: http://www2.camara.leg.br/transparencia/acesso-a-informacao/copy_of_perguntas-frequentes/cota-para-o-exercicio-da-atividade-parlamentar
For this lab we gonna need the following package to be installed
#install.packages("ggplot2")
#install.packages("dplyr")
#install.packages("tidyr")
#install.packages("scales")
#install.packages("plotly")
Loading the libraries
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(scales)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
Set up the workspace folder
setwd("~/git/data-analysis/Lab01")
Loading the data
data <- read.csv(("/home/josemsf/Downloads/dadosCEAP.csv"))
montly_limit <- read.csv(("/home/josemsf/Downloads/limiteMensalCEAP.csv"))
data$valorGlosa <- as.numeric(sub(",", ".", data$valorGlosa, fixed = TRUE))
data %>%
full_join(montly_limit, by=c("sgUF" = "UF")) -> data
Here we have two graphics that show the top 10 deputies that have expeended more money and the top 10 more economic.
greater_expenses <- data %>%
group_by(nomeParlamentar) %>%
filter(valorLíquido >= 0) %>%
summarise(dep_expenses = sum(valorLíquido))
greater_expenses <- greater_expenses[order(greater_expenses$dep_expenses),]
filter_expensies <- rbind(tail(greater_expenses, 10))
ggplot(filter_expensies, aes(y = dep_expenses, x = reorder(nomeParlamentar, dep_expenses)), top_n(x, 10)) + order(decreasing = TRUE) + scale_y_continuous(labels = comma) +
geom_bar(stat="identity") +
labs(title = "The Deputies That Have Expeended More Money",
x = "Deputies", y = "Expenses in RS") +
coord_flip()
Here we can see 10 deputies that have expeend more money which are:
filter_expensies_minors <- rbind(head(greater_expenses, 10))
ggplot(filter_expensies_minors, aes(y = dep_expenses, x = reorder(nomeParlamentar, dep_expenses)), top_n(x, 10)) + order(decreasing = TRUE) + scale_y_continuous(labels = comma) +
geom_bar(stat="identity") +
labs(title = "The Deputies That Have Expended Less Money",
x = "Deputies", y = "Expenses in RS") +
coord_flip()
Now those are the ones who have expeended less money:
expending_abord <- data %>%
group_by(sgUF) %>%
filter(tipoDocumento == 2, valorLíquido >= 0) %>%
summarise(expense = sum(valorLíquido))
expending_abord <- expending_abord[order(expending_abord$expense),]
expending_abord$sgUF <- factor(expending_abord$sgUF, levels = expending_abord$sgUF[order(expending_abord$expense)])
ggplot(expending_abord, aes(x = sgUF, y = expense)) + theme_bw() + geom_bar(stat = "identity") +
scale_y_continuous(labels = comma) +
geom_bar(stat="identity") +
labs(title = "The Deputies That Have Expeended Less Money Abroad",
x = "Deputies", y = "Expenses in RS") +
coord_flip()
As we can see the hilight is São Paulo which have been the most expenses to abroad whit RS 102366.56 followed by Minas Gerais whit RS 79767.77 and Pernambuco whit RS 70915.94. And alse we hilight Maranhão that have been expended only RS 40.99 and Paraíba whit RS 2288.29..
expense_by_group <- data %>%
group_by(sgPartido) %>%
filter(valorLíquido >= 0, sgUF == "PB") %>%
summarise(expense = sum(abs(valorLíquido)))
expense_by_group$sgUF <- factor(expense_by_group$sgPartido, levels = expense_by_group$sgPartido[order(expense_by_group$expense)])
ggplot(expense_by_group, aes(y = expense, x = reorder(sgPartido, expense))) + order(decreasing = TRUE) + scale_y_continuous(labels = comma) +
geom_bar(stat="identity") +
labs(title = "Grups Expenses",
x = "Grups", y = "Expenses in RS") +
coord_flip()
PMDB is the group that most uses CEP whit a value of RS 4011621.34 followed by PR whit RS 1434506.56
limit_exceeded <- data %>%
mutate(ano = substr(dataEmissao, 1, 4)) %>%
mutate(mes = substr(dataEmissao, 6, 7)) %>%
group_by(nomeParlamentar, limite_mensal, mes, ano) %>%
filter(valorLíquido >= 0) %>%
summarise(expense = sum(valorLíquido)) %>%
filter(expense > limite_mensal)
limit_exceeded <- limit_exceeded %>%
group_by(nomeParlamentar) %>%
summarise(times_exceeded = n())
limit_exceeded <- limit_exceeded[order(limit_exceeded$times_exceeded, decreasing = TRUE),]
limit_exceeded$indexGasto <- factor(limit_exceeded$nomeParlamentar, levels = limit_exceeded$nomeParlamentar)
limit_exceeded %>%
plot_ly(x= ~indexGasto, y= ~times_exceeded,type= "scatter", mode= "lines+markers") %>%
layout(title="Number of time the deputies have exceeded the monthly limit of expenses",
xaxis=list(title="Deputies", range= c(0,10)),
yaxis=list(title="Times of limit exceeded"), barmode="stack")
Our data show that at least one deputie have passed his state CEAP limit, but ther are some most frequent, which are:
We are going to look only to expenses whit the description about airline tickes, once whit that information we will sum all values by states.
flyies_expenses <- data %>%
group_by(sgUF) %>%
filter(tipoDespesa == "Emissão Bilhete Aéreo", valorLíquido >= 0) %>%
summarise(expense = sum(valorLíquido))
flyies_expenses$sgUF <- factor(flyies_expenses$sgUF, levels = flyies_expenses$sgUF[order(flyies_expenses$expense)])
ggplot(flyies_expenses, aes(x = sgUF, y = expense)) + theme_bw() + geom_bar(stat = "identity") + scale_y_continuous(labels = comma) +
labs(title = "State whit its epenses whit airline tickets",
x = "Value in RS", y = "State")
So what we’ve got it São Paulo is the state that expend more money then the other whit ailine ticket, and the total expeended was about RS 23171817.73 and Rio de Janeiro whit RS16755188.64
Gonna chose PMDB, PT and DEM and take a close look in its expenses.
expense_most_required <- data %>%
group_by(tipoDespesa) %>%
filter(sgPartido %in% c("PMDB", "PT", "DEM"), valorLíquido >= 0) %>%
summarise(expense = sum(valorLíquido))
expense_most_required$tipoDespesa <- factor(expense_most_required$tipoDespesa, levels = expense_most_required$tipoDespesa[order(expense_most_required$expense)])
ggplot(expense_most_required, aes(y = expense, x = reorder(tipoDespesa, expense)), top_n(x, 10)) + order(decreasing = TRUE) + scale_y_continuous(labels = comma) + theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
geom_bar(stat="identity") +
labs(title = "Expenses by the groups PMDB, PT and DEM",
y = "Expenses in RS", x = "Type of expenses") +
coord_flip()
12 diferentes types of expenses and as far we can see the most expensice is airline tickets whit RS 47353189.62 and very near we got deputies activities publicities whit RS 40119480.33.